function [] = readEvaluations(inFolder, outFolder, varargin)
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%% This script takes as input the following files:
% inFolder/evaluationsData.csv:      eventDate_julian | ProviderId | ConsumerId | ProviderCantonId | ConsumerCantonId
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

% Add paths
addpath('../code/general_funcs');

if length(varargin) >= 1
	removeProviderRepeatTransacs = varargin{1};  %%%% This is useful for the robustness check where we model simultaneous adoption by a (R,O) pair
else
	removeProviderRepeatTransacs = false;
end

%%%%% Define geographic levels
GEO_VARS = {'CantonId'};
NumGeoVars = length(GEO_VARS);

%%%%% Define paths and input files
file1          = sprintf('%s/evaluationsData.csv', inFolder);
geoDefFile     = sprintf('%s/../covariates/Scope_CantonId.csv', inFolder);
dayLabelsFile  = sprintf('%s/../covariates/periodLabels_day.csv', inFolder);
weekLabelsFile = sprintf('%s/../covariates/periodLabels_week.csv', inFolder);

%%%%% Read input files
% Read geo-def file and get geo codes that will allow me to convert from geoCode to integer
opts_geo = detectImportOptions(geoDefFile);
for GEO_VAR = GEO_VARS;
	opts_geo = setvartype(opts_geo, GEO_VAR, 'char');
end
Mgeo = readtable(geoDefFile, opts_geo);

%% Get period labels (days and weeks)
M2 = readtable(dayLabelsFile);
dayLabels = table2cell(M2(:,2));
dayLabels2 = cellfun(@(x) datestr(x, 'yyyy-mm-dd') , dayLabels,'UniformOutput',false);
clear M2;

M2 = readtable(weekLabelsFile);
weekLabels = table2cell(M2(:,2));
clear M2;

% Read transactionsData file
opts = detectImportOptions(file1);
for GEO_VAR = GEO_VARS;
	opts = setvartype(opts, {sprintf('Provider%s', GEO_VAR{1})}, 'char');
	opts = setvartype(opts, {sprintf('Consumer%s', GEO_VAR{1})}, 'char');
end
M1 = readtable(file1, opts);

%%%%% Get geoCodes
geoCodes = cell(1,NumGeoVars);
for gidx = 1:NumGeoVars
	GEO_VAR = GEO_VARS{gidx};
	[~,mycolidx] = ismember(GEO_VAR, Mgeo.Properties.VariableNames);
	geoCodes{gidx} = unique(table2cell(Mgeo(:,mycolidx)));
	clear mycolidx;
end

%%%%% Get transactionDate, provider_geo and consumer_geo
% Identify index of first transaction for each consumer
consumerIds = table2array(M1(:,'ConsumerId'));
unqConsumerIds = sort(unique(consumerIds));
NumConsumers = length(unqConsumerIds);
[~,consumerIdxes] = ismember(consumerIds, unqConsumerIds);
consumer2firstIdx = accumarray(consumerIdxes, [1:length(consumerIdxes)]', [NumConsumers 1], @min);

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%% Count fraction of transactions for which it is the consumer's first transaction %%%%%
disp('Counting the fraction of transactions for which it is the consumers first transaction...');
disp('... over the full time window:');
a = consumer2firstIdx(consumerIdxes); % for each transaction idx, gives the transaction idx of the corresponding consumer's first transaction
b = [1:size(M1,1)]';
% Fraction of first-time rentals, across all transactions:
mean(a==b) % --> in general: 63.37

% Fraction of first-time rentals for periods between 0 and 476
disp('... within the window of analysis:');
transactionDate = table2array(M1(:,'eventDate_julian'));
uu = find(transactionDate > 0 & transactionDate < 476);
disp(mean(a(uu) == b(uu))); % 0.7183
clear a b uu  transactionDate;
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%% Count fraction of pro-sumers
disp('Counting fraction of pro-sumers...');
disp('... over the full data window:');
% Taking into account transactions in all periods (prosumer is someone who has had a transaction under both roles)
providerIds = table2array(M1(:,'ProviderId'));
unqProviderIds = sort(unique(providerIds));
prosumerIds = intersect(unqConsumerIds, unqProviderIds);
myNumProviders1 = length(unqProviderIds);
myNumConsumers1 =length(unqConsumerIds);
myNumProsumers1 = length(prosumerIds);
myNumUsersWithTransacs1 = (myNumProviders1+myNumConsumers1-myNumProsumers1);
disp(myNumProsumers1/myNumUsersWithTransacs1);

clear providerIds unqProviderIds prosumerIds;
clear myNumProviders1 myNumConsumers1 myNumProsumers1 myNumUsersWithTransacs1;

%% Taking into account only transactions within period of interest
disp('... within the window of analysis:');
transactionDate = table2array(M1(:,'eventDate_julian'));
M1subset = M1(transactionDate < 476,:);

myconsumerIds2 = table2array(M1subset(:,'ConsumerId'));
myUnqConsumerIds2 = sort(unique(myconsumerIds2));
providerIds = table2array(M1subset(:,'ProviderId'));
unqProviderIds = sort(unique(providerIds));
prosumerIds = intersect(myUnqConsumerIds2, unqProviderIds);
myNumProviders2 = length(unqProviderIds);
myNumConsumers2 = length(myUnqConsumerIds2);
myNumProsumers2 = length(prosumerIds);
myNumUsersWithTransacs2 = (myNumProviders2+myNumConsumers2-myNumProsumers2);
disp(myNumProsumers2/myNumUsersWithTransacs2);

clear M1subset myconsumerIds2 myUnqConsumerIds2;
clear transactionDate providerIds unqProviderIds prosumerIds;
clear myNumProviders2 myNumConsumers2 myNumProsumers2 myNumUsersWithTransacs2;
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

% Retain only those rows of M1 that correspond to first-time rentals by the consumer
consumer2firstIdx = sort(consumer2firstIdx);
M1 = M1(consumer2firstIdx,:);


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%% Count number of consumers with at least one transaction
disp('Counting number of consumers with at least one transaction...');
disp('... over the full data window:');
disp(size(M1,1));

disp('... within the window of analysis:');
transactionDate = table2array(M1(:,'eventDate_julian'));
M1subset = M1(transactionDate > 0 & transactionDate < 476,:);
disp(size(M1subset,1));

clear transactionDate M1subset;

%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%% Count fraction of first-time transactions for which the consumer rents a car within the same canton
disp('Counting fraction of first-time transactions for which the consumer rents a car within the same canton...');
disp('... over the full data window:');
a = table2cell(M1(:,'ProviderCantonId'));
b = table2cell(M1(:,'ConsumerCantonId'));
disp(mean(strcmp(a, b)));
clear a b

% ... within window of analysis
disp('... within the window of analysis:');
transactionDate = table2array(M1(:,'eventDate_julian'));
M1subset = M1(transactionDate > 0 & transactionDate < 476,:);
a = table2cell(M1subset(:,'ProviderCantonId'));
b = table2cell(M1subset(:,'ConsumerCantonId'));
disp(mean(strcmp(a, b)));

clear a b transactionDate M1subset;
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%


%% Remove transactions that are not the provider's first transaction
if removeProviderRepeatTransacs
	% Identify index of first transaction for each provider
	providerIds = table2array(M1(:,'ProviderId'));
	unqProviderIds = sort(unique(providerIds));
	NumProviders = length(unqProviderIds);
	[~,providerIdxes] = ismember(providerIds, unqProviderIds);
	provider2firstIdx = accumarray(providerIdxes, [1:length(providerIdxes)]', [NumProviders 1], @min);
	
	% Retain only those rows of M1 that correspond to first-time rentals by the provider
	provider2firstIdx = sort(provider2firstIdx);
	M1 = M1(provider2firstIdx,:);
end

% Proceed with the rest
transactionDate = table2array(M1(:,'eventDate_julian'));
provider_geo = cell(NumGeoVars,1);
consumer_geo = cell(NumGeoVars,1);
for gidx = 1:NumGeoVars
	GEO_VAR = GEO_VARS{gidx};
	providerGeoVarName = sprintf('Provider%s', GEO_VAR);
	consumerGeoVarName = sprintf('Consumer%s', GEO_VAR);
	providerGeoCode = table2array(M1(:,providerGeoVarName));
	consumerGeoCode = table2array(M1(:,consumerGeoVarName));
	[~,provider_geo{gidx}] = ismember(providerGeoCode, geoCodes{gidx});
	[~,consumer_geo{gidx}] = ismember(consumerGeoCode, geoCodes{gidx});
	% Clean up
	clear GEO_VAR providerGeoVarName consumerGeoVarName providerGeoCode consumerGeoCode
end

%%% Clean up
clear file1 geoDefFile opts opts_geo;
clear M1 Mgeo;


%%%%% Output continuous-time data
for gidx = 1:NumGeoVars
	GEO_VAR = GEO_VARS{gidx};
	geoLabels = geoCodes{gidx};
	
	% Put together transactions_ctsData and sort rows by increasing timestamp
	transactions_ctsData = [transactionDate consumer_geo{gidx} provider_geo{gidx}]; % NumRows x 3
	[~,sortidxes] = sort(transactions_ctsData(:,1));
	transactions_ctsData = transactions_ctsData(sortidxes,:);
	
	timestampsData.geoLabels = geoLabels;        % K x 1
	timestampsData.timestamps = transactions_ctsData(:,1); % NumRows x 1
	timestampsData.kk_consumer   = make_integers(transactions_ctsData(:,2)); % NumRows x 1 (values between 1 and K)
	timestampsData.kk_provider   = make_integers(transactions_ctsData(:,3)); % NumRows x 1 (values between 1 and K)
	
	if ~removeProviderRepeatTransacs
		outputFile = sprintf('%s/consumerAdoptionsData_%s_ctsTime.mat', outFolder, GEO_VAR);
	else
		outputFile = sprintf('%s/pairAdoptionsData_%s_ctsTime.mat', outFolder, GEO_VAR);
	end
	disp(sprintf('Saving file: %s...', outputFile));
	save(outputFile, 'timestampsData', '-v7.3');
end

if removeProviderRepeatTransacs
	return;
end


%%%%% Count transactions by location-pair and time
% Loop over time aggregation levels
timeAggregationLevelNames = {'week', 'day'};
for tidx = 1:length(timeAggregationLevelNames)
	
	timeAggLevel = timeAggregationLevelNames{tidx};
	if strcmp(timeAggLevel, 'day')
		Tmax = 1826;
		timeLabels = dayLabels;
		timestamp_2_tt_function = @(time_val) ceil(time_val);  % Discretizes timestamp into tt_vec (time stamp to discrete day idx)
	end;
	if strcmp(timeAggLevel, 'week')
		Tmax = 260;
		timeLabels = weekLabels;
		timestamp_2_tt_function = @(time_val) ceil(time_val/7); % Discretizes timestamp into tt_vec (time stamp to discrete week idx)
	end;

	for gidx = 1:NumGeoVars
		GEO_VAR = GEO_VARS{gidx};
		geoLabels = geoCodes{gidx};
		K = length(geoLabels);
		provider_kk_vec  = provider_geo{gidx};      % NumTransactions x 1    --> gives index between 1 and K
		consumer_kk_vec = consumer_geo{gidx};     % NumTransactions x 1    --> gives index between 1 and K
		
		outputFile = sprintf('%s/consumerAdoptionsData_%s_%s.mat', outFolder, GEO_VAR, timeAggLevel);
		
		disp(sprintf('Saving file: %s...', outputFile));
		 
		%%%%% Convert timestamp to discrete time measure
		tt_vec = timestamp_2_tt_function(transactionDate);
		
		%%%%% Count PAST evaluations by time and location of consumer
		idxes_beforeTimeWindow = find(~isnan(tt_vec) & tt_vec < 1);
		consumer_kk_vec_before = consumer_kk_vec(idxes_beforeTimeWindow);
		pastConsumerTransactionsOrig = accumarray(consumer_kk_vec_before, consumer_kk_vec_before, [K 1], @length); % K_consumer x 1
		
		%%%%% Count PAST evaluations by time and location of provider
		provider_kk_vec_before = provider_kk_vec(idxes_beforeTimeWindow);
		pastConsumerTransactionsDest = accumarray(provider_kk_vec_before, provider_kk_vec_before, [K 1], @length); % K_provider x 1
		
		%%%%% Count evaluations by time and location of consumer and location of provider
		% Restrict evaluations to only those in desired time window
		idxes_duringTimeWindow = find(~isnan(tt_vec) & tt_vec >= 1 & tt_vec <= Tmax); % Filter out observations that are outside time window (1 <= tt <= Tmax)	
		provider_kk_vec_during  = provider_kk_vec(idxes_duringTimeWindow);
		consumer_kk_vec_during = consumer_kk_vec(idxes_duringTimeWindow);
		tt_vec        = tt_vec(idxes_duringTimeWindow);
	
		% Count all events that happened during period of interest
		transactions = accumarray([tt_vec provider_kk_vec_during consumer_kk_vec_during], tt_vec, [Tmax K K], @length); % Tmax x K_provider x K_consumer
		
		% Save it to an object
		adoptionData.pastConsumerTransactionsOrig = pastConsumerTransactionsOrig; % K_consumer x 1
		adoptionData.pastConsumerTransactionsDest = pastConsumerTransactionsDest; % K_provider x 1
		adoptionData.transactions           = transactions; % Tmax x K_provider x K_consumer
		adoptionData.geoLabels              = geoLabels;
		adoptionData.timeLabels             = timeLabels;
		
		%%%% Output to file
		save(outputFile, 'adoptionData', '-v7.3');
	end
end

end